*********  First run  "Clean Dataset"    ***********
//This file contains code for Figure D.1, D.2 and D.3, Table D.1 and D.2,
// calculations for "Appendix F" and test of discrimination in Part 4.


// 1. statistics on choice of name, score and gender
// 1.1 Those who are defined as different from their sex and sum stat
count if sex!=gender
count if sex==1 & gender==0
count if sex==0 & gender==1

//descriptive statistics
tabstat gender alder  score1 score2 score3 score4 score5 score6 score7 , ///
statistics(min max mean sd N) columns(statistics)  by(treatment)


****** Figure D.1 **********
// 1.2 Graphs of distributions by gender
// 1.2.1 gender: graphical distributions of individual scores in the different rounds
ssc install cdfplot

cdfplot score1, by(gender) ytitle("") /// 
xlab(, grid) xtitle("Score") ///
title("Cumulatives:" "Quiz 1") nodraw /// 
name(scfigure1)

cdfplot score2, by(gender) ytitle("") /// 
xlab(, grid) xtitle("Score") ///
title("Cumulatives:" "Quiz 2") nodraw ///
name(scfigure2)

cdfplot score3, by(gender) ytitle("") /// 
xlab(, grid) xtitle("Score") ///
title("Cumulatives:" "Quiz 3") nodraw ///
name(scfigure3)

cdfplot score4, by(gender) ytitle("") /// 
xlab(, grid) xtitle("Score") ///
title("Cumulatives:" "Quiz 4") nodraw ///
name(scfigure4)

cdfplot score5, by(gender) ytitle("") /// 
xlab(, grid) xtitle("Score") ///
title("Cumulatives:" "Quiz 5") nodraw ///
name(scfigure5)

cdfplot score6, by(gender) ytitle("") /// 
xlab(, grid) xtitle("Score") ///
title("Cumulatives:" "Quiz 6") nodraw ///
name(scfigure6)

cdfplot score7, by(gender) ytitle("") /// 
xlab(, grid) xtitle("Score") ///
title("Cumulatives:" "Quiz 7") nodraw ///
name(scfigure7)

gr combine scfigure1 scfigure2 scfigure3 ///
scfigure4 scfigure5 scfigure6 scfigure7, xcommon ycommon nodraw

******** Figure D2 and D3

// 1.2.2 Total parscore and individual score round 1-5
cdfplot totscore, by(gender) ytitle("") /// 
xlab(, grid) xtitle("Score") ///
title("Cumulatives:" "Total Individual Score, Quiz 1-5") nodraw ///
name(totfigure1)

cdfplot totparscore, by(gender) ytitle("") nodraw /// 
xlab(, grid) xtitle("Score") ///
title("Cumulatives:" "Total Score of Pairs, Quiz 1-5") ///
name(totfigure2)

****** Table D1 and D2   ********

//1.3 Difference in distributions. Can tabulate it. 
ksmirnov score1, by(sex)
ksmirnov score2, by(sex)
ksmirnov score3, by(sex)
ksmirnov score4, by(sex)
ksmirnov score5, by(sex)
ksmirnov score6, by(sex)
ksmirnov score7, by(sex)
ksmirnov totscore, by(sex)
ksmirnov totparscore, by(sex)

ranksum score1, by(sex)
ranksum score2, by(sex)
ranksum score3, by(sex)
ranksum score4, by(sex)
ranksum score5, by(sex)
ranksum score6, by(sex)
ranksum score7, by(sex)
ranksum totscore, by(sex)
ranksum totparscore, by(sex)

ksmirnov score1, by(gender)
ksmirnov score2, by(gender)
ksmirnov score3, by(gender)
ksmirnov score4, by(gender)
ksmirnov score5, by(gender)
ksmirnov score6, by(gender)
ksmirnov score7, by(gender)
ksmirnov totscore, by(gender)
ksmirnov totparscore, by(gender)

ranksum score1, by(gender)
ranksum score2, by(gender)
ranksum score3, by(gender)
ranksum score4, by(gender)
ranksum score5, by(gender)
ranksum score6, by(gender)
ranksum score7, by(gender)
ranksum totscore, by(gender)
ranksum totparscore, by(gender)

//Looking at differences in means
sum score1 if gender==1
sum score1 if gender==0

sum score2 if gender==1
sum score2 if gender==0

sum score3 if gender==1
sum score3 if gender==0

sum score4 if gender==1
sum score4 if gender==0

sum score5 if gender==1
sum score5 if gender==0

sum totscore if gender==1
sum totscore if gender==0

sum score6 if gender==1
sum score6 if gender==0

sum score7 if gender==1
sum score7 if gender==0

//decisions part2
//gender of chosen ones in part 2
gen genchos1=0 
replace genchos1=1 if genk1==1

gen genchos2=0
replace genchos2=1 if genk2==1

gen femchos=genchos1 + genchos2
drop genchos1 genchos2

//rank of candidates´s total parscores in part2
ssc install rowranks
rowranks totparscorek1-totparscorek4, generate(rankk1-rankk4) field

//number of females among top two
gen gentop1=0
replace gentop1=genk1 if rankk1<=1.5
replace gentop1=genk2 if rankk2<=1.5
replace gentop1=genk3 if rankk3<=1.5
replace gentop1=genk4 if rankk4<=1.5

replace gentop1=genk1+genk2 if rankk1==1.5 & rankk2==1.5
replace gentop1=genk1+genk3 if rankk1==1.5 & rankk3==1.5
replace gentop1=genk1+genk4 if rankk1==1.5 & rankk4==1.5
replace gentop1=genk2+genk3 if rankk2==1.5 & rankk3==1.5
replace gentop1=genk2+genk4 if rankk2==1.5 & rankk4==1.5
replace gentop1=genk3+genk4 if rankk3==1.5 & rankk4==1.5

gen gentop2=0
replace gentop2=genk1 if rankk1<=2.5 & rankk1>1.5
replace gentop2=genk2 if rankk2<=2.5 & rankk2>1.5
replace gentop2=genk3 if rankk3<=2.5 & rankk3>1.5
replace gentop2=genk4 if rankk4<=2.5 & rankk4>1.5

gen femtop2=gentop1+gentop2
drop gentop1 gentop2

//number of females displayed
gen femdisp=genk1+genk2+genk3+genk4

//number of chosen females per female displayed
gen relfemchos=femchos/femdisp

//label variables decision2
label var rankk1 "Rank of candidate 1 part 2"
label var rankk2 "Rank of candidate 2 part2"
label var rankk3 "Rank of candidate 3 part2"
label var rankk4 "Rank of candidate 4 part2"
label var femtop2 "how many females among top 2 ranked"
label var femchos "number of females chosen part 2"
label var femdisp "number of females displayed to a subject part2"
label var relfemchos "number of females chosen relative to females displayed part2"

//Decisions part 3
//chose a female or not
gen chosfem=0
forvalues i=1/4{
	replace chosfem=genk2_`i' if valg==kand2_`i'
}

//create the chosen variables
gen chosen2_1=0
replace chosen2_1=1 if valg==kand2_1

gen chosen2_2=0
replace chosen2_2=1 if valg==kand2_2

gen chosen2_3=0
replace chosen2_3=1 if valg==kand2_3

gen chosen2_4=0
replace chosen2_4=1 if valg==kand2_4

//rank of the candidates and the chosen one
rowranks totscorek2_1-totscorek2_4, generate(rankk2_1-rankk2_4) field

gen rankchos=0
forvalues i=1/4{
	replace rankchos=rankk2_`i' if valg==kand2_`i'
}

//Number of females displayed 
gen femdisp3=genk2_1+genk2_2+genk2_3+genk2_4

//relative score of the chosen one
gen avscore=(totscorek2_1+totscorek2_2+totscorek2_3+totscorek2_4)/4

gen relscore=0
forvalues i=1/4{
	replace relscore=totscorek2_`i'/avscore if valg==kand2_`i'
}

drop avscore

//label variables decision3
label var chosfem "Gender of the chosen candidate part3"
label var rankk2_1 "Rank of candidate 1 part3"
label var rankk2_2 "Rank of candidate 2 part3"
label var rankk2_3 "Rank of candidate 3 part3"
label var rankk2_4 "Rank of candidate 4 part3"
label var rankchos "rank of the chosen candidate part3"
label var femdisp3 "number of females displayed to a subject part3"
label var relscore "Score of the chosen relative to average among the 4 candiates part3" 


//decisions part 4
//Number of females displayed
gen femdisp4=genk4_1+genk4_2+genk4_3



//gender of the chosen ones in part 4
gen femchosflest=0
forvalues i=1/3{
	replace femchosflest=genk4_`i' if flest==kand4_`i'
}

gen femchosminst=0
forvalues i=1/3{
	replace femchosminst=genk4_`i' if minst==kand4_`i'
}

//relative score of the chosen ones part 4 (to show no relation between score and choice)
gen avscore=(totscorek4_1+totscorek4_2+totscorek4_3)/3

gen relscorflest=0
forvalues i=1/3{
	replace relscorflest=totscorek4_`i'/avscore if flest==kand4_`i'
}

gen relscorminst=0
forvalues i=1/3{
	replace relscorminst=totscorek4_`i'/avscore if minst==kand4_`i'
}

drop avscore

//label variables
label var femchosflest "Gender of chosen candidate flest part4"
label var femchosminst "Gender of chosen candidate minst part4"
label var relscorflest "Score of chosen(flest) relative to average part4"
label var relscorminst "Score of chosen(minst) relative to average part4"






******* Calculations for section "Appendix F"

//post-hoc profits analysis
gen scorek1=.
qui forval i = 1/`=_N' { 
	su score6 if bortvalg1[`i'] == id, meanonly 
	replace scorek1 = r(min) in `i'
}

gen scorek2=.
qui forval i = 1/`=_N' { 
	su score6 if bortvalg2[`i'] == id, meanonly 
	replace scorek2 = r(min) in `i'
}

gen scorek3=scorev1
gen scorek4=scorev2

rowranks scorek1-scorek4, generate(rankq1-rankq4) field


//calculating the best score on quiz 6
gen bestsum=scorek1+scorek2 if (rankq1<=1.5 & rankq2<=2.5) | (rankq1<=2.5 & rankq2<=1.5)
replace bestsum=scorek1+scorek3 if (rankq1<=1.5 & rankq3<=2.5) | (rankq1<=2.5 & rankq3<=1.5)
replace bestsum=scorek1+scorek4 if (rankq1<=1.5 & rankq4<=2.5) | (rankq1<=2.5 & rankq4<=1.5)
replace bestsum=scorek2+scorek3 if (rankq2<=1.5 & rankq3<=2.5) | (rankq2<=2.5 & rankq3<=1.5)
replace bestsum=scorek2+scorek4 if (rankq2<=1.5 & rankq4<=2.5) | (rankq2<=2.5 & rankq4<=1.5)
replace bestsum=scorek3+scorek4 if (rankq3<=1.5 & rankq4<=2.5) | (rankq3<=2.5 & rankq4<=1.5)

//for those four for which these rules do not apply
replace bestsum = 9 in 10
replace bestsum = 12 in 64
replace bestsum = 11 in 88
replace bestsum = 13 in 133



//insert the sum in obs 10,64,88,133
//compare

gen differencia=bestsum-scorevalgtpar
sum differencia if treatment==1
sum differencia if treatment==0

gen differenciaearn=differencia*3
sum differenciaearn if treatment==1
sum differenciaearn if treatment==0

ttest differencia, by(treatment)
ttest differenciaearn, by(treatment)

 

// what happens to those who only choose the second best candidate?
gen rankfem=0
replace rankfem=1 if rankchos>=2 & genk2_1==1 & rankk2_1==1
replace rankfem=1 if rankchos>=2 & genk2_2==1 & rankk2_2==1
replace rankfem=1 if rankchos>=2 & genk2_3==1 & rankk2_3==1
replace rankfem=1 if rankchos>=2 & genk2_4==1 & rankk2_4==1

***** Test of differences in Part 4 *******

sum femdisp4
//use the mean in the following bitest!

bitest femchosflest=0.57 
bitest femchosminst=0.57 







